
*** This file recreates Table 2 and Panel B of Table 3 using recursive OOS
*** It generates OOS R2 and various statistics for its significance
 
clear

use mydata

rename *, lower

eststo clear

global ssize = _N

gen dm=mofd(date)
format dm %tm
tsset dm

* Taking lags of variables

gen l1mktrf = l1.mktrf
gen l1amihud =l1.amihud
gen l1tover = l1.tover
gen l1hml = l1.hml
gen l1smb = l1.smb
gen lsyy = l1.syy

local num =1

global t5 = -invt(203,0.05)
global t10 = -invt(203,0.1)

local train 120 // training period. 

save oss_temp, replace

matrix OOS_T=J(7,6,.) // output dimension ....

local x = 1

foreach flowvar in hfflow hfflow_ma hfflow_chg active active_ma active_chg {

gen xvar = l1.`flowvar'

local ctrlvar lsyy l1mktrf l1amihud l1tover l1hml l1smb // control variables 

matrix OOS_T[1,`x']=`x'

local period `train'  
scalar define k = $ssize - `period'  
local y=k //the periods left for forcasting

gen yhat_a = .  // yhat of alternative model. The fittted value from the training model using x var (flow in this case)
gen yhat_n = .  // yhat of null model. Historical average of y var in this case
gen betas =. // betas of predictive regressions
gen tvalues =. // regression t values

forvalues i=1/`y' {

qui reg xvar `ctrlvar' if _n<= `period' +`i' // model to obtain residual flow for recursive OSS
qui predict res_flow if  _n<= `period' +`i' , re  // residual flows is the regressor in the training model  

qui reg syy res_flow if _n< `period' +`i'  // training model for recursive OOS prediction: anomalies on residual flows
qui replace yhat_a=_b[_cons] + _b[res_flow]*res_flow  if _n== `period' +`i' // fitted value using the trained model  
qui replace betas = _b[res_flow] if _n== `period' + `i' 
qui replace tvalues = _b[res_flow]/_se[res_flow]

qui su syy if _n< `period' +`i'
qui replace yhat_n=r(mean) if _n== `period' +`i' // yhat of null model. Historical average
drop res_flow
}

*Sqaure of Predicted error (SE)
gen se_a = (syy - yhat_a)^2
gen se_n = (syy - yhat_n)^2

su se_a 
scalar define MSE_A = r(mean)
su se_n 
scalar define MSE_N = r(mean)

su betas
scalar define mean_beta = r(mean)
su tvalues 
scalar define mean_tvalue = r(mean)
ttest betas =0
scalar define t_beta = r(t)

// OOS R2
scalar define OOSR2 = (1 - MSE_A/MSE_N)*100
dis "******************Training Period is `train'*******************************"
dis "OOS-R2:  " OOSR2 //(1 - MSE_A/MSE_N) 
dis "***************************************************************************"

matrix OOS_T[2,`x']= OOSR2 //(1 - MSE_A/MSE_N)*100

*** Clark and Weset (2007) t-test with bootstrap
gen CW = se_n - se_a + (yhat_n - yhat_a)^2

qui reg CW , vce(bootstrap, rep(1000))
scalar define t_CW = _b[_cons]/_se[_cons]
matrix OOS_T[3,`x']=t_CW //_b[_cons]/_se[_cons]

scalar define seCW = OOSR2/t_CW
matrix OOS_T[4,`x'] = OOSR2 - seCW * $t10
matrix OOS_T[5,`x'] = OOSR2 - seCW * $t5

matrix OOS_T[6,`x'] = mean_beta
matrix OOS_T[7,`x'] = mean_tvalue

local ++x
use oss_temp, clear
}

*** saving the OOS results 

mat rownames OOS_T = FlowVar OOS-R2(%) OOS-T_CWbs LowB-10% LowB-5% avg_beta avg_tval 

matrix list OOS_T, format(%9.3f)

putexcel set T2&T3B, modify
putexcel A1=matrix(OOS_T), names


